{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Gaussian Mixture Model Selection\n\n\nThis example shows that model selection can be performed with\nGaussian Mixture Models using information-theoretic criteria (BIC).\nModel selection concerns both the covariance type\nand the number of components in the model.\nIn that case, AIC also provides the right result (not shown to save time),\nbut BIC is better suited if the problem is to identify the right model.\nUnlike Bayesian procedures, such inferences are prior-free.\n\nIn that case, the model with 2 components and full covariance\n(which corresponds to the true generative model) is selected.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\nimport itertools\n\nfrom scipy import linalg\nimport matplotlib.pyplot as plt\nimport matplotlib as mpl\n\nfrom sklearn import mixture\n\nprint(__doc__)\n\n# Number of samples per component\nn_samples = 500\n\n# Generate random sample, two components\nnp.random.seed(0)\nC = np.array([[0., -0.1], [1.7, .4]])\nX = np.r_[np.dot(np.random.randn(n_samples, 2), C),\n          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]\n\nlowest_bic = np.infty\nbic = []\nn_components_range = range(1, 7)\ncv_types = ['spherical', 'tied', 'diag', 'full']\nfor cv_type in cv_types:\n    for n_components in n_components_range:\n        # Fit a Gaussian mixture with EM\n        gmm = mixture.GaussianMixture(n_components=n_components,\n                                      covariance_type=cv_type)\n        gmm.fit(X)\n        bic.append(gmm.bic(X))\n        if bic[-1] < lowest_bic:\n            lowest_bic = bic[-1]\n            best_gmm = gmm\n\nbic = np.array(bic)\ncolor_iter = itertools.cycle(['navy', 'turquoise', 'cornflowerblue',\n                              'darkorange'])\nclf = best_gmm\nbars = []\n\n# Plot the BIC scores\nplt.figure(figsize=(8, 6))\nspl = plt.subplot(2, 1, 1)\nfor i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):\n    xpos = np.array(n_components_range) + .2 * (i - 2)\n    bars.append(plt.bar(xpos, bic[i * len(n_components_range):\n                                  (i + 1) * len(n_components_range)],\n                        width=.2, color=color))\nplt.xticks(n_components_range)\nplt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()])\nplt.title('BIC score per model')\nxpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\\\n    .2 * np.floor(bic.argmin() / len(n_components_range))\nplt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14)\nspl.set_xlabel('Number of components')\nspl.legend([b[0] for b in bars], cv_types)\n\n# Plot the winner\nsplot = plt.subplot(2, 1, 2)\nY_ = clf.predict(X)\nfor i, (mean, cov, color) in enumerate(zip(clf.means_, clf.covariances_,\n                                           color_iter)):\n    v, w = linalg.eigh(cov)\n    if not np.any(Y_ == i):\n        continue\n    plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)\n\n    # Plot an ellipse to show the Gaussian component\n    angle = np.arctan2(w[0][1], w[0][0])\n    angle = 180. * angle / np.pi  # convert to degrees\n    v = 2. * np.sqrt(2.) * np.sqrt(v)\n    ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)\n    ell.set_clip_box(splot.bbox)\n    ell.set_alpha(.5)\n    splot.add_artist(ell)\n\nplt.xticks(())\nplt.yticks(())\nplt.title('Selected GMM: full model, 2 components')\nplt.subplots_adjust(hspace=.35, bottom=.02)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}